import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import pickle
from decision_company import read_csv_file, convert_to_datetime, create_dataframe, col_copy, concatenate_objects, extract_year, bind_dataframe, aggregate_data, create_figure, draw_lineplot, set_plot_title, set_plot_ylabel, show_plots, linear_regression, save_plot

atp_tennis = read_csv_file('atp_tennis.csv')

# Convert the 'Date' column to datetime format
atp_tennis['Date'] = convert_to_datetime(atp_tennis['Date'])

# Initialize an empty dictionary for the new DataFrame
player_data_dict = {'Player': [], 'Rank': [], 'Win_Loss_Ratio': [], 'Surface': [], 'Date': []}

# Create a new DataFrame with the specified columns and data
player_data = create_dataframe(player_data_dict)

# Add Player_1 data to the new DataFrame
player_data_1 = col_copy(atp_tennis, ['Player_1', 'Rank_1', 'Win_Loss_Ratio_1', 'Surface', 'Date'])
player_data_1.columns = ['Player', 'Rank', 'Win_Loss_Ratio', 'Surface', 'Date']

# Add Player_2 data to the new DataFrame
player_data_2 = col_copy(atp_tennis, ['Player_2', 'Rank_2', 'Win_Loss_Ratio_2', 'Surface', 'Date'])
player_data_2.columns = ['Player', 'Rank', 'Win_Loss_Ratio', 'Surface', 'Date']

# Concatenate Player_1 and Player_2 data
player_data = concatenate_objects(player_data_1, player_data_2)

# Group the DataFrame by year and surface type, and compute the average win/loss ratio
player_data['Year'] = extract_year(player_data, 'Date')
grouped_data = bind_dataframe(player_data, ['Year', 'Surface'])
agg_dict = {'Win_Loss_Ratio': 'mean'}
yearly_surface_data = aggregate_data(grouped_data, agg_dict).reset_index()

# Create a line chart comparing win/loss ratios across years and surface types
create_figure(figsize=(12, 6))
draw_lineplot(x='Year', y='Win_Loss_Ratio', hue='Surface', data=yearly_surface_data)
set_plot_title('Win/Loss Ratios by Surface Type and Year')
set_plot_ylabel('Average Win/Loss Ratio')
save_plot("./ref_result/lineplot.png")
show_plots()

# Perform a linear regression for each surface type to analyze trends
trend_results = {}
for surface in ['Hard', 'Clay', 'Grass']:
    surface_data = yearly_surface_data[yearly_surface_data['Surface'] == surface]
    slope, intercept, r_value, p_value, std_err = linear_regression(surface_data['Year'], surface_data['Win_Loss_Ratio'])
    trend_results[surface] = {'Slope': slope, 'P-value': p_value}

print(trend_results)
pickle.dump(trend_results,open("./ref_result/trend_results.pkl","wb"))